In [22]:
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from pandas.plotting import scatter_matrix
sns.set(font_scale=1.5)
%matplotlib inline
In [32]:
df = pd.read_csv('BodyFat.csv.gz', index_col='IDNO')
df = df.drop(columns=[df.columns[-1]])
# df['WEIGHT'].dtype
df.head()
Out[32]:
In [37]:
scatter_matrix(df, figsize=(15,15));
# plt.tight_layout()
In [39]:
with pm.Model() as model:
pm.glm.GLM.from_formula('BODYFAT ~ NECK', df)
trace = pm.sample(5000)
In [40]:
pm.traceplot(trace)
Out[40]:
In [41]:
pm.summary(trace)
Out[41]:
In [42]:
with pm.Model() as model:
pm.glm.GLM.from_formula('BODYFAT ~ NECK + THIGH', df)
trace = pm.sample(5000)
In [45]:
pm.traceplot(trace);
In [46]:
pm.summary(trace)
Out[46]:
In [47]:
with pm.Model() as model:
pm.glm.GLM.from_formula('BODYFAT ~ NECK + THIGH + DENSITY', df)
trace = pm.sample(5000)
In [48]:
pm.traceplot(trace);
In [49]:
pm.summary(trace)
Out[49]:
In [50]:
cols = df.columns[1:]
' + '.join(cols)
Out[50]:
In [51]:
cols = df.columns[1:]
with pm.Model() as model:
pm.glm.GLM.from_formula('BODYFAT ~ {}'.format(' + '.join(cols)), df)
trace = pm.sample(5000, target_accept=0.9)
In [52]:
pm.traceplot(trace);
In [53]:
pm.summary(trace)
Out[53]:
In [57]:
df['D_cat'] = pd.qcut(df['DENSITY'], 5)
In [68]:
fig, ax = plt.subplots(4,1, figsize=(12, 10), sharex=True)
sns.violinplot(x='D_cat', y='NECK', data=df, ax=ax[0])
sns.violinplot(x='D_cat', y='AGE', data=df, ax=ax[1])
sns.violinplot(x='D_cat', y='FOREARM', data=df, ax=ax[2])
sns.violinplot(x='D_cat', y='CHEST', data=df, ax=ax[3])
plt.tight_layout()
In [ ]: